version 16
set more off

***************************************************************************************************
*THIS FILE PRODUCES FIGURE I OF SLEMROD-REHMAN-WASEEM-2020, RESTAT
***************************************************************************************************

/* PRELIMINARIES */

use				"$project_data\ITRM_Dislo_Unique_SelfEmployed.dta", clear
keep				if year>=2012
keep				if discl==1
keep				regno year TaxPaid name_count_v2 
g					namefreq=name_count_v2
g					one=1
bys				year:egen N=sum(one)
g					interval=0
replace			interval=1 if namefreq==1
replace			interval=2 if namefreq>1 & namefreq<=5
replace			interval=3 if namefreq>5 & namefreq<=10
replace			interval=4 if namefreq>10 & namefreq<=20
replace			interval=5 if namefreq>20 & namefreq<=50
replace			interval=6 if namefreq>50 & namefreq<=100
replace			interval=7 if namefreq>100 & namefreq<=500
replace			interval=8 if namefreq>500 & namefreq<=5000
replace			interval=9 if namefreq>5000
bys				year interval:egen numbinterv=sum(one)
g					fracinterv=(numbinterv/N)*100
bys 				year interval:g index=_n
#d					;
twoway  			(connected  numbinterv interval if  year==2012 & index==1,sort clcolor(brown) mcolor(brown) msymbol(hs) lwidth(thick))
					(connected  numbinterv interval if  year==2013 & index==1,sort clcolor(navy) mcolor(navy) msymbol(d) lwidth(thick))
					(connected  numbinterv interval if  year==2014 & index==1,sort clcolor(maroon) mcolor(maroon) msymbol(s) lwidth(thick))
					(connected  numbinterv interval if  year==2015 & index==1,sort clcolor(green) mcolor(green) msymbol(t) lwidth(thick)),
					ytitle(Number of Taxpayers (000s)) xtitle(Name Frequency) 
					ylabel(0 50000 "50" 100000 "100" 150000 "150" 200000 "200")
					xlabel(1 "1" 2 "2-5" 3 "6-10" 4 "11-20" 5 "21-50" 6 "51-100" 7 "101-500" 8 "501-5000" 9 ">5000", angle(90))
					legend(region(style(none)) label(1 "2012") label(2 "2013") label(3 "2014")
					label(4 "2015") rows(1) order(1 2 3 4)) 
					graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph 			export "$project_output\NameFrequencyDistribution_Number.eps", replace;

	#d				;
	twoway  		(connected  fracinterv interval if  year==2012 & index==1,sort clcolor(brown) mcolor(brown) msymbol(hs) lwidth(thick))
					(connected  fracinterv interval if  year==2013 & index==1,sort clcolor(navy) mcolor(navy) msymbol(d) lwidth(thick))
					(connected  fracinterv interval if  year==2014 & index==1,sort clcolor(maroon) mcolor(maroon) msymbol(s) lwidth(thick))
					(connected  fracinterv interval if  year==2015 & index==1,sort clcolor(green) mcolor(green) msymbol(t) lwidth(thick)),
					ytitle(Fraction of Taxpayers (%)) xtitle(Name Frequency) ylabel(0(10)30)
					xlabel(1 "1" 2 "2-5" 3 "6-10" 4 "11-20" 5 "21-50" 6 "51-100" 7 "101-500" 8 "501-5000" 9 ">5000", angle(90))
					legend(region(style(none)) label(1 "2012") label(2 "2013") label(3 "2014")
					label(4 "2015") rows(1) order(1 2 3 4)) 
					graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph 			export "$project_output\NameFrequencyDistribution_Fraction.eps", replace;

